========================================================================= 
Log Path: /Users/wenhao/Dropbox/Research/Inverted Liquidity Crises/Data/Firm quality and financial availability - Wenhao's version/output/log/log_B_CompustatAnnual.log 
Program Path: /Users/wenhao/Dropbox/Research/Inverted Liquidity Crises/Data/Firm quality and financial availability - Wenhao's version/code/0_DownloadData/2_CompustatAnnual.R 
Working Directory: /Users/wenhao/Dropbox/Research/Inverted Liquidity Crises/Data/Firm quality and financial availability - Wenhao's version 
User Name: wenhao 
R Version: 4.4.0 (2024-04-24) 
Machine: Wenhaos-MacBook-2020.local x86_64 
Operating System: Darwin 23.4.0 Darwin Kernel Version 23.4.0: Wed Feb 21 21:44:31 PST 2024; root:xnu-10063.101.15~2/RELEASE_X86_64 
Base Packages: stats graphics grDevices utils datasets methods base 
Other Packages: tidylog_1.1.0 zoo_1.8-12 dtplyr_1.3.1 RPostgres_1.4.7 procs_1.0.6 reporter_1.4.4 libr_1.3.3 logr_1.3.8 fmtr_1.6.5 common_1.1.3 sassy_1.2.5 here_1.0.1 data.table_1.15.4 lubridate_1.9.3 forcats_1.0.0 stringr_1.5.1 dplyr_1.1.4 purrr_1.0.2 readr_2.1.5 tidyr_1.3.1 tibble_3.2.1 ggplot2_3.5.1 tidyverse_2.0.0 
Log Start Time: 2024-07-08 10:34:27.833077 
========================================================================= 

Warning: incomplete final line found on '/Users/wenhao/Dropbox/Research/Inverted Liquidity Crises/Data/Firm quality and financial availability - Wenhao's version/code/0_DownloadData/2_CompustatAnnual.R' 

> library(tidyverse)
> library(data.table)
> library(dtplyr)
> library(lubridate)
> library(stringr)
> library(zoo)
> library(here)
> library(sassy)
> library(RPostgres)
> 
> # Open the log
> lf <- log_open(file.path(here("output", "log"), "log_B_CompustatAnnual.log"), autolog = T, show_notes = F)
> 
> # Send code to the log
> log_code()
> 
> # 1 Read data from WRDS -------------------------------------------------------------------
> sep("1 Read data from WRDS")
> 
> wrds <- dbConnect(Postgres(),
>                   host='wrds-pgdata.wharton.upenn.edu',
>                   port=9737,
>                   dbname='wrds',
>                   sslmode='require',
>                   user='tiangeyeusc')
> 
> SQL_statement <- 
>   "
>   SELECT a.gvkey, a.datadate, a.conm, a.fyear, a.tic, a.cusip, a.naicsh, a.sich, 
> 	        a.aco,a.act,a.ajex,a.am,a.ao,a.ap,a.at,a.capx,a.ceq,a.ceqt,a.che,a.cogs,
> 		      a.csho,a.cshrc,a.dcpstk,a.dcvt,a.dlc,a.dlcch,a.dltis,a.dltr,
> 		      a.dltt,a.dm,a.dp,a.drc,a.drlt,a.dv,a.dvc,a.dvp,a.dvpa,a.dvpd,
> 		      a.dvpsx_c,a.dvt,a.ebit,a.ebitda,a.emp,a.epspi,a.epspx,a.fatb,a.fatl,
> 		      a.ffo,a.fincf,a.fopt,a.gdwl,a.gdwlia,a.gdwlip,a.gwo,a.ib,a.ibcom,
> 		      a.intan,a.invt,a.ivao,a.ivncf,a.ivst,a.lco,a.lct,a.lo,a.lt,a.mib,
> 		      a.msa,a.ni,a.nopi,a.oancf,a.ob,a.oiadp,a.oibdp,a.pi,a.ppenb,a.ppegt,
> 		      a.ppenls,
> 		      a.ppent,a.prcc_c,a.prcc_f,a.prstkc,a.prstkcc,a.pstk,a.pstkl,a.pstkrv,
> 		      a.re,a.rect,a.recta,a.revt,a.sale,a.scstkc,a.seq,a.spi,a.sstk,
> 		      a.tstkp,a.txdb,a.txdi,a.txditc,a.txfo,a.txfed,a.txp,a.txt,
> 		      a.wcap,a.wcapch,a.xacc,a.xad,a.xint,a.xrd,a.xpp,a.xsga
> 	FROM COMP.FUNDA as a
> 	WHERE a.consol = 'C'
> 	AND a.popsrc = 'D'
> 	AND a.datafmt = 'STD'
> 	AND a.curcd = 'USD'
> 	AND a.indfmt = 'INDL'
> "
> 
> res <- dbSendQuery(conn = wrds, statement = SQL_statement)
> df_CompustatAnnual <- dbFetch(res)
> dbClearResult(res)
> 
> # 2 Some cleaning -------------------------------------------------------------------
> sep("2 Some cleaning")
> 
> # Step 1: Drop rows with missing values in specific columns
> df_CompustatAnnual <- df_CompustatAnnual %>%
>   filter(!is.na(at) & !is.na(prcc_c) & !is.na(ni))
> 
> # Step 2: Generate a new column 'cnum' with the first 6 characters of 'cusip'
> df_CompustatAnnual <- df_CompustatAnnual %>%
>   mutate(cnum = substr(cusip, 1, 6))
> 
> # Initialize 'dr' (deferred revenue) column with missing values
> # drc = deferred revenue current 
> # drlt = deferred revenue long-term
> df_CompustatAnnual <- df_CompustatAnnual %>%
>   mutate(dr = NA) %>%
>   mutate(dr = ifelse(!is.na(drc) & !is.na(drlt), drc + drlt, dr)) %>%
>   mutate(dr = ifelse(!is.na(drc) & is.na(drlt), drc, dr)) %>%
>   mutate(dr = ifelse(is.na(drc) & !is.na(drlt), drlt, dr))
> 
> 
> # Initialize 'dc' column with missing values
> # dcpstk = convertible debt and preferred stock, pstk = preferred stock, dcvt = convertible debt
> df_CompustatAnnual <- df_CompustatAnnual %>%
>   mutate(dc = NA) %>%
>   mutate(dc = ifelse(dcpstk > pstk & !is.na(pstk) & !is.na(dcpstk) & is.na(dcvt), dcpstk - pstk, dc)) %>%
>   mutate(dc = ifelse(is.na(pstk) & !is.na(dcpstk) & is.na(dcvt), dcpstk, dc)) %>% 
>   mutate(dc = ifelse(is.na(dc), dcvt, dc))
> 
> # xint = interest and related expense
> df_CompustatAnnual <- df_CompustatAnnual %>%
>   mutate(xint0 = ifelse(!is.na(xint), xint, 0))
> 
> # xsga = Selling, general and administrative expenses
> df_CompustatAnnual <- df_CompustatAnnual %>%
>   mutate(xsga0 = ifelse(!is.na(xsga), xsga, 0))
> 
> # Initialize xad0 column with 0
> df_CompustatAnnual <- df_CompustatAnnual %>%
>   mutate(xad0 = ifelse(!is.na(xad), xad, 0))
> 
> # Define the columns to replace missing values with 0
> zero_cols <- c("nopi", "dvt", "ob", "dm", "dc", "aco", "ap", "intan", "ao", "lco", "lo", "rect", 
>                "invt", "drc", "spi", "gdwl", "che", "dp", "act", "lct", "tstkp", "dvpa", "scstkc", 
>                "sstk", "mib", "ivao", "prstkc", "prstkcc", "txditc", "ivst")
> 
> # Replace missing values with 0 in the specified columns using %>% (pipe)
> df_CompustatAnnual <- df_CompustatAnnual %>%
>   mutate(across(all_of(zero_cols), ~ifelse(is.na(.), 0, .)))
> 
> 
> # 3 Merge with CRSP linking table ------------------------------------------------------------------
> sep("3 Merge with CRSP linking table")
> 
> df_CCMLinkingTable <- readRDS(here("data", "Link", "CCMLinkingTable.RDS"))
> 
> df_CompustatAnnual <- df_CompustatAnnual %>% inner_join(df_CCMLinkingTable, by = "gvkey")
> df_CompustatAnnual <- df_CompustatAnnual %>%
>   filter(timelinkstart_d <= datadate & (datadate <= timelinkend_d | is.na(timelinkend_d)))
> 
> # Assuming 6-month reporting lag
> df_CompustatAnnual <- df_CompustatAnnual %>%
>   mutate(time_avail_m = as.Date(datadate) %m+% months(6)) %>% 
>   mutate(time_avail_m = year(time_avail_m) * 100 + month(time_avail_m))
> 
> # Annual version: Remove unwanted columns
> df_CompustatAnnual <- df_CompustatAnnual %>%
>   select(-timelinkstart_d, -timelinkend_d, -linkprim, -linktype, -liid)
> 
> df_CompustatAnnual <- df_CompustatAnnual %>% select(gvkey, permno, everything())
> 
> # save
> fwrite(df_CompustatAnnual, here("data", "Compustat", "a_aCompustat.csv"))
> saveRDS(df_CompustatAnnual, here("data", "Compustat", "a_aCompustat.RDS"))
> 
> # 4 Monthly version ------------------------------------------------------------------
> sep("4 Monthly version")
> 
> df_m_aCompustat <- df_CompustatAnnual
> 
> setDT(df_m_aCompustat)
> # Expand each row by repeating 12 times
> df_m_aCompustat <- df_m_aCompustat[, .SD[rep(1:.N, each = 12)], by = .(gvkey)] 
> # Create 'tempTime' column as a copy of 'time_avail_m'
> df_m_aCompustat[, tempTime := time_avail_m] 
> 
> # Update 'time_avail_m' based on group-specific calculations
> df_m_aCompustat[, datadate2 := datadate %m+% months(0:(.N - 1)), by = .(gvkey, tempTime)]
> df_m_aCompustat[, datadate2 := as.Date(datadate2) %m+% months(6)] 
> df_m_aCompustat[, time_avail_m := year(datadate2)*100+month(datadate2)]
> 
> # Drop 'temp' and 'tempTime' columns
> df_m_aCompustat <- df_m_aCompustat %>% 
>   select(-tempTime, datadate2)
> 
> # Keep rows based on conditions using .I
> # this affects .14% of observations that had changes of fiscal year ends.
> # In that case, we keep the more recent info
> df_m_aCompustat <- df_m_aCompustat %>%
>   arrange(gvkey, time_avail_m, datadate) %>%
>   group_by(gvkey, time_avail_m) %>%
>   filter(row_number() == n()) %>%
>   ungroup() %>% 
>   as.data.table()
> 
> # This affects an additional 89/3m observation
> df_m_aCompustat <- df_m_aCompustat %>%
>   arrange(permno, time_avail_m, datadate) %>%
>   group_by(permno, time_avail_m) %>%
>   filter(row_number() == n()) %>%
>   ungroup() %>% 
>   as.data.table()
> 
> # save
> fwrite(df_m_aCompustat, here("data", "Intermediate", "m_aCompustat.csv"))
> saveRDS(df_m_aCompustat, here("data", "Intermediate", "m_aCompustat.RDS"))
> 
> # Close log
> log_close()
> 
> # View results
> writeLines(readLines(lf))

========================================================================= 
1 Read data from WRDS 
========================================================================= 

========================================================================= 
2 Some cleaning 
========================================================================= 

filter: removed 145,429 rows (29%), 359,999 rows remaining

mutate: new variable 'cnum' (character) with 29,124 unique values and 0% NA

mutate: new variable 'dr' (logical) with one unique value and 100% NA

mutate: converted 'dr' from logical to double (122810 fewer NA)

mutate: changed 1,231 values (<1%) of 'dr' (1,231 fewer NAs)

mutate: changed 6,727 values (2%) of 'dr' (6,727 fewer NAs)

mutate: new variable 'dc' (logical) with one unique value and 100% NA

mutate: converted 'dc' from logical to double (1772 fewer NA)

mutate: changed 9,199 values (3%) of 'dc' (9,199 fewer NAs)

mutate: changed 304,137 values (84%) of 'dc' (304,137 fewer NAs)

mutate: new variable 'xint0' (double) with 62,039 unique values and 0% NA

mutate: new variable 'xsga0' (double) with 128,385 unique values and 0% NA

mutate: new variable 'xad0' (double) with 26,624 unique values and 0% NA

mutate: changed 31,353 values (9%) of 'aco' (31,353 fewer NAs)

        changed 56,269 values (16%) of 'act' (56,269 fewer NAs)

        changed 2,698 values (1%) of 'ao' (2,698 fewer NAs)

        changed 24,241 values (7%) of 'ap' (24,241 fewer NAs)

        changed 3,136 values (1%) of 'che' (3,136 fewer NAs)

        changed 123,410 values (34%) of 'dm' (123,410 fewer NAs)

        changed 15,593 values (4%) of 'dp' (15,593 fewer NAs)

        changed 235,958 values (66%) of 'drc' (235,958 fewer NAs)

        changed 85,526 values (24%) of 'dvpa' (85,526 fewer NAs)

        changed 1,894 values (1%) of 'dvt' (1,894 fewer NAs)

        changed 135,865 values (38%) of 'gdwl' (135,865 fewer NAs)

        changed 31,321 values (9%) of 'intan' (31,321 fewer NAs)

        changed 10,007 values (3%) of 'invt' (10,007 fewer NAs)

        changed 19,545 values (5%) of 'ivao' (19,545 fewer NAs)

        changed 48,236 values (13%) of 'ivst' (48,236 fewer NAs)

        changed 49,784 values (14%) of 'lco' (49,784 fewer NAs)

        changed 52,412 values (15%) of 'lct' (52,412 fewer NAs)

        changed 3,132 values (1%) of 'lo' (3,132 fewer NAs)

        changed 15,926 values (4%) of 'mib' (15,926 fewer NAs)

        changed 1,262 values (<1%) of 'nopi' (1,262 fewer NAs)

        changed 271,384 values (75%) of 'ob' (271,384 fewer NAs)

        changed 64,930 values (18%) of 'prstkc' (64,930 fewer NAs)

        changed 350,740 values (97%) of 'prstkcc' (350,740 fewer NAs)

        changed 9,158 values (3%) of 'rect' (9,158 fewer NAs)

        changed 350,585 values (97%) of 'scstkc' (350,585 fewer NAs)

        changed 17,257 values (5%) of 'spi' (17,257 fewer NAs)

        changed 54,052 values (15%) of 'sstk' (54,052 fewer NAs)

        changed 72,929 values (20%) of 'tstkp' (72,929 fewer NAs)

        changed 38,539 values (11%) of 'txditc' (38,539 fewer NAs)

        changed 44,891 values (12%) of 'dc' (44,891 fewer NAs)

========================================================================= 
3 Merge with CRSP linking table 
========================================================================= 

inner_join: added 10 columns (cik, sic, naics, linkprim, linktype, …)

            > rows only in x                  ( 22,917)

            > rows only in df_CCMLinkingTable (  1,967)

            > matched rows                     425,928    (includes duplicates)

            >                                 =========

            > rows total                       425,928

filter: removed 128,890 rows (30%), 297,038 rows remaining

Error: object 'datadate' not found 
Traceback: 
No traceback available 

========================================================================= 
Log End Time: 2024-07-08 10:52:53.25278 
Log Elapsed Time: 0 00:18:25 
========================================================================= 
